# This file produces all results in the paper and the appendix
# The results were produced in R version 3.4.4 (2018-03-15) on x86_64-apple-darwin15.6.0 system
# The first part prepares data (lines 27 - 258)
# The second part replicates the results (lines > 261)
# The results are reproduced in the order they are mentioned in the paper

rm(list=ls())

# setwd() to current directory!

#if required packages are missing (using tested versions), the lines below will install them:
packages <- c('stm', 'openxlsx', 'ggplot2', 'gridExtra', 'grid', 'dplyr', 'data.table', 'lubridate', 'zoo', 
              'lmtest', 'mgcv', 'rdrobust', 'arm', 'stargazer', 'heavy', 'multiwayvcov', 'coda', 'bit64')
###########################################################################
# The following versions of the above packages were used
# Uncomment the line below to see how versions map to package names
# cbind(packages, c('1.2.2', '4.0.17', '2.2.1', '2.2.1', '3.4.4', '0.5.0', '1.10.4', '1.6.0', '1.8-0', '0.9-35', '1.8-23', '0.99', '1.9-3', '5.2', '0.38.1', '1.2.3', '0.19-1'))
###########################################################################

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]; if(length(new.packages)){install.packages(new.packages)}
rm(packages, new.packages)

# Auxilliary functions:
source("helperfunctions.R")

# Set seed for all simulations
set.seed('19521007')

##########################################
##### Load primary data
##########################################
# CrowdFlower codings data
cfdata <- fread(input = 'crowdflower_data.txt', header = T, stringsAsFactors = F, colClasses=list(character=20:22), encoding = "UTF-8")

# Stock market data
rts <- fread(input = 'rts.txt', header = T, stringsAsFactors = F)

# Exchange rate data
exc <- read.table(file = 'exchange_rate.txt', header = T, sep = '\t', stringsAsFactors = F)

# Oil price data
oil <- fread(file = 'brent_oil.txt', header = T, sep = '\t', stringsAsFactors = F)

# Inflation data
inflation <- fread(file = 'inflation.txt', header = T, sep = '\t', stringsAsFactors = F)

# Channnel One news transcripts
data <- fread(file = 'transcripts.txt', header = T, sep = '\t', stringsAsFactors = F, encoding = "UTF-8")

# CrowdFlower data on event sentiment (for Appendix B3)
distortion_data <- fread(input = 'distortion_data.txt', header = T, stringsAsFactors = F, encoding = "UTF-8")

# Load processed stm topics object
thetas <- get(load('stm_topics_object.RData'))

##########################################
##### Prepare CrowdFlower data
##########################################
### Clean: remove responses with "not mentioned" and any other option
cfdata <- data.table(cfdata)
d <- subset(cfdata, !X_unit_id %in% cfdata$X_unit_id[grepl(pattern = '1\n', x = cfdata$negative) | grepl(pattern = '1\n', x = cfdata$positive)])

### Find fragments about Russian economy: True if the weighted majority of coders code a fragment as about Russian economy
rusec <- d[, .('denom' = sum(X_trust), 'numer' = sum(rus_ec*X_trust), 'frac' = sum(rus_ec*X_trust)/sum(X_trust) ), by = 'X_unit_id']
rusec_unit_ids <- subset(rusec, frac > 0.5)$X_unit_id

### Parse responses with multiple answers (event attribution)
d <- parse_into_option_dummies(data = d, variable = 'negative')
d <- parse_into_option_dummies(data = d, variable = 'positive')

### Aggregate sentiment results
# Compute weighted proportions of sentiment values for each fragment (weighted by coders' trust scores)
sent_prop <- d[, .('neg_w_prop' = sum(X_trust[sentiment == -1 & !is.na(sentiment)])/sum(X_trust[!is.na(sentiment)]), 'neut_w_prop' = sum(X_trust[sentiment == 0 & !is.na(sentiment)])/sum(X_trust[!is.na(sentiment)]), 'pos_w_prop' = sum(X_trust[sentiment == 1 & !is.na(sentiment)])/sum(X_trust[!is.na(sentiment)])), by = 'X_unit_id']

# Find sentiment values with the weighted majority of responses (and coding confidence)
sent_w_majority <- data.frame('X_unit_id' = sent_prop$X_unit_id, 'sentiment' = sapply(1:nrow(sent_prop), function(k) ifelse(sent_prop$neg_w_prop[k] > 0.5, -1, ifelse(sent_prop$neut_w_prop[k] > 0.5, 0, ifelse(sent_prop$pos_w_prop[k] > 0.5, 1, NA) )) ), stringsAsFactors = F)
sent_w_majority <- make_sentiment_confidence(data = sent_w_majority, reference_data = sent_prop)

### Aggregate event results
# Compute weighted proportions of event atribution (weighted by coders' trust scores)
options_w_prop <- d[, .('neg_not_mentioned' = sum(X_trust[neg_not_mentioned == 1])/sum(X_trust[rus_ec == 1]), 'neg_putin' = sum(X_trust[neg_putin == 1])/sum(X_trust[rus_ec == 1]), 'neg_rugov' = sum(X_trust[neg_rugov == 1])/sum(X_trust[rus_ec == 1]), 'neg_rubis' = sum(X_trust[neg_rubis == 1])/sum(X_trust[rus_ec == 1]), 'neg_forgov' = sum(X_trust[neg_forgov == 1])/sum(X_trust[rus_ec == 1]), 'neg_forbis' = sum(X_trust[neg_forbis == 1])/sum(X_trust[rus_ec == 1]), 'neg_forec' = sum(X_trust[neg_forec == 1])/sum(X_trust[rus_ec == 1]), 'neg_other' = sum(X_trust[neg_other == 1])/sum(X_trust[rus_ec == 1]), 'pos_not_mentioned' = sum(X_trust[pos_not_mentioned == 1])/sum(X_trust[rus_ec == 1]), 'pos_putin' = sum(X_trust[pos_putin == 1])/sum(X_trust[rus_ec == 1]), 'pos_rugov' = sum(X_trust[pos_rugov == 1])/sum(X_trust[rus_ec == 1]), 'pos_rubis' = sum(X_trust[pos_rubis == 1])/sum(X_trust[rus_ec == 1]), 'pos_forgov' = sum(X_trust[pos_forgov == 1])/sum(X_trust[rus_ec == 1]), 'pos_forbis' = sum(X_trust[pos_forbis == 1])/sum(X_trust[rus_ec == 1]), 'pos_forec' = sum(X_trust[pos_forec == 1])/sum(X_trust[rus_ec == 1]), 'pos_other' = sum(X_trust[pos_other == 1])/sum(X_trust[rus_ec == 1])), by = 'X_unit_id']

# Make dummies: did a weighted majority of coders attribute an event to a given category?
options_w_prop <- make_option_majority_dummies(data = options_w_prop)

### Join aggregated sentiment and event results
dat <- merge(x = options_w_prop, y = sent_w_majority, by = 'X_unit_id')

# Add id, date, and text
d_sm <- subset(d, select = c(X_unit_id, date, text))
d_sm <- d_sm[!duplicated(d_sm)]
dat <- merge(x = dat, y = d_sm, by = 'X_unit_id')
cat('There are ', nrow(dat), ' coded fragments\n')

# Exclude cases with no events
dat <- subset(dat, neg_not_mentioned != 1 | pos_not_mentioned != 1)

### Fix date format
dat$year_month <- substr(x = dat$date, start = 1, stop = 6)
dat$year <- as.numeric(substr(x = dat$date, start = 1, stop = 4))
dat$month <- as.numeric(substr(x = dat$date, start = 5, stop = 6))
dat$day <- as.numeric(substr(x = dat$date, start = 7, stop = 8))
dat$date <- as.Date(x = paste0(dat$year, '-', dat$month, '-', dat$day), format = '%Y-%m-%d')

timdat <- expand.grid('year' = 1999:2016, 'month' = 1:12)
timdat <- timdat[order(timdat$year, timdat$month, decreasing = F),]
rownames(timdat) <- NULL
timdat$time <- 1:nrow(timdat)
dat <- merge(x = dat, y = timdat, by = c("year", "month"), all.x = T)

### Remove obs NOT about Russian economy
dat <- subset(dat, X_unit_id %in% rusec_unit_ids)
cat('There are ', nrow(dat), ' fragments about Russian economy\n') # 6774 total, 4812 about Russian economy

#### Restructure data for event attribution analysis: stack positive and negative fragments on top of each other
dat_pos <- subset(dat, select = -c(neg_putin, neg_putin_bin, neg_rugov, neg_rugov_bin, neg_rubis, neg_rubis_bin, neg_forgov, neg_forgov_bin, neg_forec, neg_forec_bin, neg_other, neg_other_bin, neg_not_mentioned))
dat_pos$outcome_type <- 'pos'

dat_neg <- subset(dat, select = -c(pos_putin, pos_putin_bin, pos_rugov, pos_rugov_bin, pos_rubis, pos_rubis_bin, pos_forgov, pos_forgov_bin, pos_forec, pos_forec_bin, pos_other, pos_other_bin, pos_not_mentioned))
dat_neg$outcome_type <- 'neg'

names(dat_pos) <- gsub(pattern = 'pos_', replacement = '', x = names(dat_pos))
names(dat_neg) <- gsub(pattern = 'neg_', replacement = '', x = names(dat_neg))

dat_stacked <- rbind(dat_pos, dat_neg)

dat_stacked$event_happened <- ifelse((dat_stacked$putin_bin + dat_stacked$rugov_bin + dat_stacked$rubis_bin + dat_stacked$forgov_bin + dat_stacked$forec_bin + dat_stacked$other_bin) > 0, 1, 0)
dat_stacked$neg_outcome <- ifelse(dat_stacked$outcome_type == 'neg', 1, 0)

### Add mentions of categories
rugov_string <- '(медведев)|(касьянов)|(фрадков)|(зубков)|(кудрин)|(силуанов)|(греф)|(улюкаев)|(геращенко)|(игнатьев)|(набиуллин)|(белоусов)'
putin_string <- '(путин)'
forgov_string <- '(сша)|(вашингтон)|(обама)|(буш)|(великобритани)|(лондон)|(блэр)|(браун)|(кэмерон)|(франци)|(париж)|(ширак)|(саркоз)|(олланд)|(германи)|(берлин)|(меркель)|(шредер)|(шрёдер)|(итали)|(рим )|(амато )|(берлускони)|(проди )|(монти )|(летта )|(ренци )|(европ.{1,4} )'
forec_string <- '(доллар)|(евро. )|(цен.{,15}нефт)'

dat$mention_putin <- count_mentions(string = putin_string, text = dat$text)
dat$mention_rugov <- count_mentions(string = rugov_string, text = dat$text)
dat$mention_forgov <- count_mentions(string = forgov_string, text = dat$text)
dat$mention_forec <- count_mentions(string = forec_string, text = dat$text)

dat$mention_bin_putin <- ifelse(dat$mention_putin > 0, 1, 0)
dat$mention_bin_rugov <- ifelse(dat$mention_rugov > 0, 1, 0)
dat$mention_bin_forgov <- ifelse(dat$mention_forgov > 0, 1, 0)
dat$mention_bin_forec <- ifelse(dat$mention_forec > 0, 1, 0)

dat_stacked$mention_putin <- count_mentions(string = putin_string, text = dat_stacked$text)
dat_stacked$mention_rugov <- count_mentions(string = rugov_string, text = dat_stacked$text)
dat_stacked$mention_forgov <- count_mentions(string = forgov_string, text = dat_stacked$text)
dat_stacked$mention_forec <- count_mentions(string = forec_string, text = dat_stacked$text)

dat_stacked$mention_bin_putin <- ifelse(dat_stacked$mention_putin > 0, 1, 0)
dat_stacked$mention_bin_rugov <- ifelse(dat_stacked$mention_rugov > 0, 1, 0)
dat_stacked$mention_bin_forgov <- ifelse(dat_stacked$mention_forgov > 0, 1, 0)
dat_stacked$mention_bin_forec <- ifelse(dat_stacked$mention_forec > 0, 1, 0)

# Add putin approval ratings from (https://www.levada.ru/indikatory/odobrenie-organov-vlasti/)
put <- read.table(file = 'putin.txt', header = T, sep = '\t', stringsAsFactors = F)
put$month <- substr(x = put$Date, start = 1, stop = 2)
put$year <- substr(x = put$Date, start = 4, stop = 7)
put$year_month <- paste0(put$year, put$month)
put <- subset(put, select = c(Approve, Diaspprove, year_month))
names(put) <- c('putin_approve', 'putin_disapprove', 'year_month')
dat <- merge(x = dat, y = put, by = 'year_month', all.x = T)
dat_stacked <- merge(x = dat_stacked, y = put, by = 'year_month', all.x = T)

# Add binary indicators for pos/neg events to non-stacked data
dat$neg_event_mentioned <- dat$neg_putin_bin + dat$neg_rugov_bin + dat$neg_rubis_bin + dat$neg_forec_bin + dat$neg_forgov_bin + dat$neg_other_bin
dat$pos_event_mentioned <- dat$pos_putin_bin + dat$pos_rugov_bin + dat$pos_rubis_bin + dat$pos_forec_bin + dat$pos_forgov_bin + dat$pos_other_bin
dat$neg_event_mentioned <- ifelse(dat$neg_event_mentioned > 0, 1, 0)
dat$pos_event_mentioned <- ifelse(dat$pos_event_mentioned > 0, 1, 0)

##########################################
##### Prepare data for direct and associational analysis
##########################################
d <- list()

### Merge with lemmatized text 
lemmas <- subset(cfdata, select = c(X_unit_id, lemma))
lemmas <- lemmas[!duplicated(lemmas),]
dat_stacked <- merge(x = dat_stacked, y = lemmas, by = 'X_unit_id')
dat <- merge(x = dat, y = lemmas, by = 'X_unit_id')

### Data frame for direct attribution (event level)
dat_stacked <- subset(dat_stacked, event_happened == 1)
dat_stacked$day <- as.numeric(dat_stacked$date - min(dat_stacked$date))
dat_stacked$market    <- grepl("фондовые|биржи", dat_stacked$lemma)
dat_stacked$oil_price <- grepl("нефт", dat_stacked$lemma)*grepl("цен", dat_stacked$lemma)
dat_stacked$currency  <- grepl("доллар|валют", dat_stacked$lemma)
dat_stacked$inflation <- grepl("инфляц", dat_stacked$lemma)
dat_stacked$oil <- grepl("нефт", dat_stacked$lemma)
dat_stacked$sanctions <- grepl("санкц", dat_stacked$lemma)

dat_stacked$oil <- grepl("нефт", dat_stacked$lemma)
dat_stacked$sanctions <- grepl("санкц", dat_stacked$lemma)

dat_stacked <- dat_stacked[order(dat_stacked$date, decreasing = F),]

d[[1]] <- as.data.frame(dat_stacked)

### Data frame for associational attribution (chunk level)
dat$date <- ymd(dat$date)
dat <- dat[order(dat$date), ]
dat$day <- as.numeric(dat$date - min(dat$date))
dat$neg_outcome <- 1*(dat$sentiment < 1)
dat <- subset(dat, pos_event_mentioned==1 | neg_event_mentioned==1)

dat$oil <- grepl("нефт", dat$lemma)
dat$sanctions <- grepl("санкц", dat$lemma)

dat <- dat[order(dat$date, decreasing = F),]

d[[2]] <- as.data.frame(dat)

##########################################
##### Prepare data for censorship analysis
##########################################
# Channel One transcripts of economic news
data$main_text <- tolower(data$main_text)
data$mention.rts <- grepl("фондов|бирж|ртс", data$main_text)
data$mention.exc <- grepl("доллар", data$main_text)&grepl("валют|рубл", data$main_text)
data$mention.oil <- grepl("нефт", data$main_text)&grepl("цен", data$main_text)
data$mention.inf <- grepl("инфляц", data$main_text)
data$id <- 1:nrow(data)
data$N  <- 1 #number of reports
data$year <- year(data$date)
data$month <- month(data$date)

# Aggregate Channel One word counts by day
adata <-  aggregate(cbind(mention.rts, mention.exc, mention.oil, mention.inf, N) ~ date + year + month, drop = TRUE, FUN = sum, na.rm = TRUE, data = data)
adata$date <- ymd(adata$date)

# Stock market data
rts$date <- dmy(rts$Date)
rts <- rts[order(rts$date), ]
rts$rts.value <- rts$Close
rts$rts.change <- log(rts$Close) - log(dplyr::lag(rts$Close, n = 1))
rts <- subset(rts, select = c(date, rts.value, rts.change))

# Exchange rate data
exc$date <- as.Date(as.character(gsub(",", "", exc$Date)), format = "%B %d %Y")
exc <- unique(exc[order(exc$date), ])
exc$exc.value <- exc$Price
exc$exc.change <- log(exc$Price) - log(dplyr::lag(exc$Price, n = 1))
exc <- subset(exc, select = c(date, exc.value, exc.change))

# Oil price data
oil$date <- mdy(oil$date)
oil <- oil[order(oil$date), ]
oil$oil.value <- oil$price
oil$oil.change <- log(oil$oil.value) - log(dplyr::lag(oil$oil.value, n = 1))
oil <- subset(oil, select = c(date, oil.change, oil.value))

# Inflation data
inflation$year <- year(ymd(inflation$DATE))
inflation$month <- month(ymd(inflation$DATE))
inflation$CPI_lag <- dplyr::lag(inflation$CPGDTT01RUM661N, n = 1)
inflation$inf.change <- with(inflation, CPGDTT01RUM661N - CPI_lag)
inflation <- subset(inflation, select = c(year, month, inf.change))

# Channel One and Economic data: Merge economic data onto the *daily* news data
econ_data <- Reduce(function(...) merge(..., by = "date", all.x=TRUE), list(adata, rts, exc, oil))
econ_data <- merge(econ_data, inflation, by = c("year", "month"), all.x = TRUE)
econ_data$year_month <- paste(econ_data$year, econ_data$month, sep = "_")
econ_data$quarter <- quarter(econ_data$date)

# Make a dataset for Figure 2
data_monthly <- group_by(econ_data, year_month)%>%summarise(inf.change = mean(inf.change, na.rm = TRUE), mention.inf_count = sum(mention.inf, na.rm = TRUE), mention.inf = mean(mention.inf > 0, na.rm = TRUE))
data_monthly$mention.inf <- data_monthly$mention.inf/max(data_monthly$mention.inf)

# Make a dataset for Figure 4
dd4 <- list(
  aggregate(cbind(putin_bin, rugov_bin, forec_bin, forgov_bin) ~ neg_outcome, data = d[[1]], FUN = sum, na.rm = TRUE)[,-1],
  aggregate(cbind(mention_bin_putin, mention_bin_rugov, mention_bin_forec, mention_bin_forgov) ~ neg_outcome, data = d[[2]], FUN = sum, na.rm = TRUE)[,-1]
)
categories <- c("Vladimir \n Putin", "Russian \n officials", "Foreign \n economy", "Foreign \n powers")

cat <- c("Attributed directly to ...", "Attributed by association to ...")
out4 <- lapply(dd4, function(d) do.call("rbind", lapply(1:2, function(z) data.frame(neg_event = factor(z - 1), var = 1:ncol(d), t(sapply(d[z,], function(x) unlist(prop.test(x, sum(d[z,]), correct = TRUE)[c("estimate", "conf.int")])))))))

out4 <- data.frame(type = rep(cat, c(8, 8)), do.call("rbind", out4))
out4$var <- c(rep(categories, 2), rep(categories, 2))
out4$var <- factor(out4$var, levels = categories)
out4$type <- factor(out4$type, levels = cat)

# Make a dataset for Figure 5
out5 <- data.frame(cbind(rep(1:2, unlist(lapply(fit5, length))), rep(1:4, 2), do.call("rbind", lapply(fit5, function(x) do.call("rbind", lapply(x, ame))))))
out5$X2 <- rep(categories, 2)
out5$X2 <- factor(out5$X2, levels = categories)
labs <- c("Attributed directly ~~~", "Attributed by association ~~~")


# Aggregate Putin approval data by month for Figure 7
K <- 1
data7 <- d[[1]]
data7 <- data7[order(data7$year, data7$month), ]
data7$putin_approval <- data7$putin_approve 
a <- aggregate(putin_approval ~ year_month, data = data7, FUN = mean)
a$putin_approval_lag <- dplyr::lag(a$putin_approval, n = K)
a$putin_approval_lead <- dplyr::lead(a$putin_approval, n = K)
a$putin_approval_change <- (a$putin_approval - a$putin_approval_lag)/a$putin_approval_lag
a$putin_approval_change_lag <- dplyr::lag(a$putin_approval_change, n = K)
data7 <- merge(data7, subset(a, select = -c(putin_approval)), by = c("year_month"), all.x = TRUE)
data7$x <- range01(data7$putin_approval_lag)


#########################################################################
##### Figure 2: Coverage of economic events on Channel 1
#########################################################################
plot_coverage(K = 5, nb = FALSE)

#########################################################################
##### Figure 3: Coverage intensity around market benchmarks
#########################################################################
plot_rdd(0.01)

#########################################################################
##### Figure 4: Attribution of good vs bad economic news
#########################################################################
figure4 <- ggplot(out4, aes(x=var, y=estimate.p, fill=neg_event)) + geom_bar(aes(alpha = neg_event), position=position_dodge(), stat="identity") + geom_errorbar(aes(x = var, ymin=conf.int1, ymax=conf.int2), width=.25, position=position_dodge(.9), lwd = 1.5) + theme_bw() + ylab("Proportion attributed") + xlab("") + scale_y_continuous(breaks = seq(0, .6, by = 0.1)) + theme(
  axis.title = element_text(size=12,face="bold"),
  axis.title.x = element_text(vjust=-.75, margin=margin(0,0,0,0)),
  axis.text.x = element_text(size=10, face = "bold", colour = "black"),
  axis.text.y = element_text(size=12),
  legend.position = "top",
  legend.key = element_rect(colour = "white"),
  legend.text = element_text(size = 12, face = "bold"),
  legend.title=element_blank(),
  legend.key.height = unit(1, "cm"),
  panel.grid.major.x = element_blank(),
  panel.grid.minor.y = element_blank()
) 

figure4 <- figure4 +  facet_grid(. ~ type) + scale_alpha_manual("legend", values = c("0" = 0.4, "1" = 0.8), labels = c("Good news  ", "Bad news  ")) + scale_fill_manual("legend", values = c("0" = "dark blue", "1" = "dark red"), labels = c("Good news  ", "Bad news  ")) + theme(strip.text.x = element_text(size = 12, face = "bold"), strip.background = element_blank()) 

figure4

#########################################################################
##### Figure 5: Predicted relative attributions of economic news
#########################################################################

### Make computations for Figure 5
bfit <- bayesglm(putin_bin ~ I(1 - neg_outcome) + factor(year(date)) + factor(month(date)) + factor(weekdays(date)), data = d[[1]], family = binomial(link = probit), keep.order = FALSE)

fit5 <- NULL

# Direct
fit5[[1]] <- list(
  update(bfit, putin_bin ~ .),
  update(bfit, rugov_bin ~ .),
  update(bfit, forec_bin ~ .),
  update(bfit, forgov_bin ~ .)
)

# Associational (chunk level)
fit5[[2]] <- list(
  update(bfit, mention_bin_putin ~ ., data = d[[2]]),
  update(bfit, mention_bin_rugov ~ ., data = d[[2]]),
  update(bfit, mention_bin_forec ~ ., data = d[[2]]),
  update(bfit, mention_bin_forgov ~ ., data = d[[2]])
)


### Make Figure 5
figure5 <- ggplot(data = out5) + geom_pointrange(aes(x = X2, y = X3, ymin = X4, ymax = X5, colour = factor(X1), shape = factor(X1)), position = position_dodge(width = .4), size = 1.25) + theme_bw() + ylab("Relative attribution") + xlab("") + scale_colour_manual(labels=labs, values = c("black", "grey50"), guide = guide_legend(nrow=1)) + scale_shape_manual(labels=labs, values = c(16:(15+2)), guide = guide_legend(nrow=1)) +
  theme(
    axis.title = element_text(size=12,face="bold"),
    axis.title.x = element_text(vjust=-.75, margin=margin(0,0,0,0)),
    axis.text.x = element_text(size=12, face = "bold", colour = "black"),
    axis.text.y = element_text(size=12),
    legend.position = "top",
    legend.key = element_rect(colour = "white"),
    legend.text = element_text(size = 12),
    legend.title=element_blank(),
    legend.key.height = unit(1, "cm"),
    panel.grid.major.x = element_blank()
  ) + geom_hline(aes(yintercept=0), linetype = "dotted")

figure5

#########################################################################
##### Figure 6: Direct attribution of economic news over time
#########################################################################
elections <- ymd(c("1999-12-19", "2000-03-14", "2003-12-07", "2004-03-14", "2007-12-02", "2008-03-02", "2011-12-04", "2012-03-04"))

fit6 <- gam(mention_bin_putin ~ neg_outcome + s(as.numeric(date), by = factor(neg_outcome), bs = "tp", k = 15), data =  d[[1]], family = binomial(link = probit))

het_plot_show(fit6)


#########################################################################
##### Figure 7: Relative direct attribution conditional on Putin's approval
#########################################################################
# Fit the model
fit7 <- NULL
fit7[[1]] <- bayesglm(putin_bin ~ I(1 - neg_outcome)*x + factor(year(date)) + factor(month(date)) + factor(weekdays(date)), data = data7, family=binomial(link=probit), keep.order=FALSE, control=list(epsilon = 1e-300, maxit = 1000))
fit7[[2]] <- update(fit7[[1]], rugov_bin ~ .)
fit7[[3]] <- update(fit7[[1]], forec_bin ~ .)
fit7[[4]] <- update(fit7[[1]], forgov_bin ~ .)

approval_out(fit7)

#########################################################################
##### Appendix B1 Figure 2: Censorship analysis using counts of reports
#########################################################################
plot_coverage(K = 5, nb = TRUE)

#########################################################################
##### Appendix B2 Figure 3: Censorship analysis without outlier exclusion
#########################################################################
plot_coverage(K = Inf, nb = FALSE)

#########################################################################
##### Appendix B2 Figure 4: Censorship analysis with economic indicators 
##### restricted to two standard deviations from their means
#########################################################################
plot_coverage(K = 2, nb = FALSE)

#########################################################################
##### Appendix B3 Table 2: Distortion probit regressions
#########################################################################
z <- unique(distortion_data$indicator)[c(2, 1, 4, 3)]

## Probit models 
fit_b3tab2 <- NULL
fit_b3tab2[[1]] <- lapply(z, function(z) glm(I(neg_context == 1) ~ scale(change), data = distortion_data, family = binomial(link = probit), subset = indicator == z))
fit_b3tab2[[2]] <- lapply(z, function(z) glm(I(trend == 1) ~ scale(change), data = subset(distortion_data, trend!=9), family = binomial(link = probit), subset = indicator == z))
fit_b3tab2[[3]] <- lapply(z, function(z) glm(I(sentiment == 1) ~ scale(change), data = subset(distortion_data, sentiment != 0), family = binomial(link = probit), subset = indicator == z))

## Create table output, concatenate manually
outtex_b3tab2(fit_b3tab2[[1]])
outtex_b3tab2(fit_b3tab2[[2]])
outtex_b3tab2(fit_b3tab2[[3]])

#########################################################################
##### Appendix B3 Figure 5: Positivity of indicator 
##### vs predicted positivity/negativity of news reports
#########################################################################
# Change direction of inflation measure (as described in the text)
distortion_data$change[distortion_data$indicator=="inf"] <- -distortion_data$change[distortion_data$indicator=="inf"]
distortion_data$change[distortion_data$indicator=="exc"] <- -distortion_data$change[distortion_data$indicator=="exc"]
distortion_data <- mutate(distortion_data, change_scaled = scale(change))

fit_b3fig5 <- NULL
fit_b3fig5[[1]] <- gam(I(neg_context == 1) ~ indicator + te(change_scaled, k=3), data = distortion_data, family = binomial(link = probit))
fit_b3fig5[[2]] <- gam(I(trend == 1) ~ indicator + te(change_scaled, k=3), data = distortion_data, family = binomial(link = probit), subset = trend!=9)
fit_b3fig5[[3]] <- gam(I(sentiment == 1) ~ indicator + te(change_scaled, k=3), data = distortion_data, family = binomial(link = probit), sentiment!=0)

plot_distortion(fit_b3fig5)

#########################################################################
##### Appendix C1 Table 3: Probit regression for the binary variable Good news
#########################################################################
fit_c1_tab3_1 <- fit_c1_tab3_2 <- fit_c1_tab3_3 <- fit_c1_tab3_4 <- fit_c1_tab3_5 <- NULL

# Panel A: All sample
for(i in 1:2) fit_c1_tab3_1[[i]] <- lapply(fit5[[i]], function(x) coeftest(update(x, data = d[[i]]), multiwayvcov::cluster.vcov(update(x, data = d[[i]]), ~ time)))

# Panel B: Excluding reports on oil and sanctions
dd <- lapply(d, subset, !(oil|sanctions))
for(i in 1:2) fit_c1_tab3_2[[i]] <- lapply(fit5[[i]], function(x) coeftest(update(x, data = dd[[i]]), multiwayvcov::cluster.vcov(update(x, data = dd[[i]]), ~ time)))

# Panel C: Excluding recession years 2008 and 2009
dd <- lapply(d, subset, !year%in%c(2008, 2009, 2014))
for(i in 1:2) fit_c1_tab3_3[[i]] <- lapply(fit5[[i]], function(x) coeftest(update(x, data = dd[[i]]), multiwayvcov::cluster.vcov(update(x, data = dd[[i]]), ~ time)))

# Panel D: Pre-Crimean annexation
dd <- lapply(d, subset, date < ymd("2014-03-18"))
for(i in 1:2) fit_c1_tab3_4[[i]] <- lapply(fit5[[i]], function(x) coeftest(update(x, data = dd[[i]]), multiwayvcov::cluster.vcov(update(x, data = dd[[i]]), ~ time)))

# Panel E: Post-Crimean annexation
dd <- lapply(d, subset, date > ymd("2014-03-18"))
for(i in 1:2) fit_c1_tab3_5[[i]] <- lapply(fit5[[i]], function(x) coeftest(update(x, data = dd[[i]]), multiwayvcov::cluster.vcov(update(x, data = dd[[i]]), ~ time)))

# Print LaTeX code
for(i in 1:2) stargazer(fit_c1_tab3_1[[i]], omit = c("factor", "Constant"), digits=2, dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, covariate.labels = c("Positive event"), table.layout = "t")
for(i in 1:2) stargazer(fit_c1_tab3_2[[i]], omit = c("factor", "Constant"), digits=2, dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, covariate.labels = c("Positive event"), table.layout = "t")
for(i in 1:2) stargazer(fit_c1_tab3_3[[i]], omit = c("factor", "Constant"), digits=2, dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, covariate.labels = c("Positive event"), table.layout = "t")
for(i in 1:2) stargazer(fit_c1_tab3_4[[i]], omit = c("factor", "Constant"), digits=2, dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, covariate.labels = c("Positive event"), table.layout = "t")
for(i in 1:2) stargazer(fit_c1_tab3_5[[i]], omit = c("factor", "Constant"), digits=2, dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, covariate.labels = c("Positive event"), table.layout = "t")

#########################################################################
##### Appendix C2 Figure 6: Top 10 topics by event-sentiment
#########################################################################
### NOTE: STM estimation takes over 4 hours on a 1-core machine. 
# One can replicate the results using the commented-out code. 
# For the ease of replication, we provide the results of simulations using 
# pre-trained models in 'stm_topics_object.RData'.
# 
# num.it = 1000
# #--- load data
# topics <- fread(input = 'labeled_stm_topics.txt', header = T, stringsAsFactors = F)
# topics$topic_num <- as.numeric(topics$topic_num)
# models <- get(load('stm_models.RData'))
# out <- get(load('stm_data_object.RData')) 
# 
# #--- setup for computations
# num.doc = nrow(out$meta)
# best.mod <- models$runout[[as.numeric('4')]]
# grand.mean <- list()
# grand.mean.ql <- list()
# grand.mean.qu <- list()
# 
# #--- start computations
# st <- proc.time()
# for (i in 1:2) {
#   cat('Iter', i, 'started\n')
#   new.design <- best.mod$settings$covariates$X
#   if (i == 1) {
#     new.design[, 'neg_event_mentioned'] <- 1
#     new.design[, 'pos_event_mentioned'] <- 0
#   }
#   if (i == 2) {
#     new.design[, 'neg_event_mentioned'] <- 0
#     new.design[, 'pos_event_mentioned'] <- 1
#   }
#   
#   eta.list <- list()
#   theta.list <- list()
#   for (j in 1:num.doc) {
#     eta.list[[j]] <- mvtnorm::rmvnorm(n = num.it, mean = new.design[j,] %*% best.mod$mu$gamma, sigma = best.mod$sigma)
#     theta.list[[j]] <- exp(eta.list[[j]]) / (rowSums(exp(eta.list[[j]])) + 1)
#     theta.list[[j]] <- cbind(theta.list[[j]], 1 / (rowSums(exp(eta.list[[j]])) + 1))
#   }
#   
#   mean.by.iter <- lapply(1:num.it, function(j) colMeans(do.call('rbind',  lapply(theta.list, function(k) k[j,]) )) )
#   mean.by.iter <- do.call('rbind', mean.by.iter)
#   
#   grand.mean[[i]] <- apply(mean.by.iter, 2, mean)
#   grand.mean.ql[[i]] <- apply(mean.by.iter, 2, quantile, probs = 0.025)
#   grand.mean.qu[[i]] <- apply(mean.by.iter, 2, quantile, probs = 0.975)
#   cat('Iter', i, 'done\n')
#   Sys.sleep(1)
# }
# fin <- proc.time()
# 
# # For plotting: plot
# num.topics = length(grand.mean[[1]])
# thetas <- lapply(1:length(grand.mean.ql), function(k) data.frame('topic_num' = 1:num.topics, 'theta_mean' = grand.mean[[k]], 'theta_ql' = grand.mean.ql[[k]], 'theta_qu' = grand.mean.qu[[k]]) )
# thetas <- lapply(thetas, function(k) merge(x = k, y = subset(topics, select = c(topic_num, lab)), by = 'topic_num'))
# thetas <- lapply(thetas, function(k) k[order(k$theta_mean, decreasing = T),])
# names(thetas) <- c('neg', 'pos')

# Plot top 10 topics for negative/positive events
par(mfrow = c(1,2), mar = c(12,6,3,1)) 
p1 <- barplot(height = thetas$neg[1:10,]$theta_mean, ylim = c(0, .05), main = 'Top 10: negative events only', names.arg = thetas$neg$lab[1:10], las = 2, col = 'lightskyblue3')
segments(x0 = p1, y0 = thetas$neg[1:10,]$theta_ql, x1 = p1, y1 = thetas$neg[1:10,]$theta_qu, lwd = 1.7)
p2 <- barplot(height = thetas$pos[1:10,]$theta_mean, ylim = c(0, .05), main = 'Top 10: positive events only', names.arg = thetas$pos$lab[1:10], las = 2, col = 'lightsalmon')
segments(x0 = p2, y0 = thetas$pos[1:10,]$theta_ql, x1 = p2, y1 = thetas$pos[1:10,]$theta_qu, lwd = 1.7)
par(mfrow = c(1,1))

#########################################################################
##### Appendix D1 Figure 7: Temporal variation in the associational attribution
#########################################################################
fit_d7 <- update(fit6, data = d[[2]])

het_assoc_plot_show(fit_d7)

#########################################################################
##### Appendix D2 Table 4: Coefficient estimates for heterogeneity 
##### conditional on Putin’s approval
#########################################################################
rfit <- lapply(fit7, function(x) lmtest::coeftest(x, multiwayvcov::cluster.vcov(x, ~ year_month)))

stargazer(rfit, omit = c("factor", "Constant"), ci = FALSE, digits=2, dep.var.caption = NULL, style = "apsr", dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, notes = "{\\footnotesize Significance levels: $^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05}", 
          notes.append=FALSE, keep.stat = c("ll", "n"), title = "Regression coefficients for probit models with interactions. All specifications include year, month, and weekday fixed effects (N = 4,234). Standard errors (in parentheses) clustered by year-month.", 
          table.placement="h!", label="tab:attribution", font.size = "small", star.cutoffs = rev(c(0.001, 0.01, 0.05)), column.labels = paste("\\multicolumn{1}{C{2cm}}{", c("Vlamir Putin", "Russian officials", "Foreign economy", "Foreign powers"), "}"), align = TRUE, 
          covariate.labels = c("Good news", "Approval", "Good news$\\times$Approval"), type = "latex")

#########################################################################
##### Appendix D3 Figure 8: Relative direct attribution depending 
##### on the change in Putin’s approval in prior month
#########################################################################
data7$x <- range01(data7$putin_approval_change_lag)
approval_out(lapply(fit7, function(x) eval(x$call)), labs = c("Putin's approval decreased ~~~~~~", "Putin's approval increased ~~~~~~"))

